Data Access

# clean up the RStudio environment 
rm(list = ls())

# # Loading packages
library(DataComputing)
library(mosaic)
library(tidyverse)
library(lubridate)
library(ggplot2)
library(ggalt)
library(corrplot)
library(plotly)
library(gridExtra)

Since the most recent available data was from 2019, I’m going to compare and contrast using the data of 2015 and 2019.

# Data sources
df2015 <- read.csv("2015.csv")
df2019 <- read.csv("2019.csv")

# Data inspection
head(df2015, 10)  
head(df2019, 10)

summary(df2015)
   Country             Region          Happiness.Rank   Happiness.Score Standard.Error   
 Length:158         Length:158         Min.   :  1.00   Min.   :2.839   Min.   :0.01848  
 Class :character   Class :character   1st Qu.: 40.25   1st Qu.:4.526   1st Qu.:0.03727  
 Mode  :character   Mode  :character   Median : 79.50   Median :5.232   Median :0.04394  
                                       Mean   : 79.49   Mean   :5.376   Mean   :0.04788  
                                       3rd Qu.:118.75   3rd Qu.:6.244   3rd Qu.:0.05230  
                                       Max.   :158.00   Max.   :7.587   Max.   :0.13693  
 Economy..GDP.per.Capita.     Family       Health..Life.Expectancy.    Freedom      
 Min.   :0.0000           Min.   :0.0000   Min.   :0.0000           Min.   :0.0000  
 1st Qu.:0.5458           1st Qu.:0.8568   1st Qu.:0.4392           1st Qu.:0.3283  
 Median :0.9102           Median :1.0295   Median :0.6967           Median :0.4355  
 Mean   :0.8461           Mean   :0.9910   Mean   :0.6303           Mean   :0.4286  
 3rd Qu.:1.1584           3rd Qu.:1.2144   3rd Qu.:0.8110           3rd Qu.:0.5491  
 Max.   :1.6904           Max.   :1.4022   Max.   :1.0252           Max.   :0.6697  
 Trust..Government.Corruption.   Generosity     Dystopia.Residual
 Min.   :0.00000               Min.   :0.0000   Min.   :0.3286   
 1st Qu.:0.06168               1st Qu.:0.1506   1st Qu.:1.7594   
 Median :0.10722               Median :0.2161   Median :2.0954   
 Mean   :0.14342               Mean   :0.2373   Mean   :2.0990   
 3rd Qu.:0.18025               3rd Qu.:0.3099   3rd Qu.:2.4624   
 Max.   :0.55191               Max.   :0.7959   Max.   :3.6021   
summary(df2019)
  Overall.rank    Country.or.region      Score       GDP.per.capita   Social.support 
 Min.   :  1.00   Length:156         Min.   :2.853   Min.   :0.0000   Min.   :0.000  
 1st Qu.: 39.75   Class :character   1st Qu.:4.545   1st Qu.:0.6028   1st Qu.:1.056  
 Median : 78.50   Mode  :character   Median :5.380   Median :0.9600   Median :1.272  
 Mean   : 78.50                      Mean   :5.407   Mean   :0.9051   Mean   :1.209  
 3rd Qu.:117.25                      3rd Qu.:6.184   3rd Qu.:1.2325   3rd Qu.:1.452  
 Max.   :156.00                      Max.   :7.769   Max.   :1.6840   Max.   :1.624  
 Healthy.life.expectancy Freedom.to.make.life.choices   Generosity     Perceptions.of.corruption
 Min.   :0.0000          Min.   :0.0000               Min.   :0.0000   Min.   :0.0000           
 1st Qu.:0.5477          1st Qu.:0.3080               1st Qu.:0.1087   1st Qu.:0.0470           
 Median :0.7890          Median :0.4170               Median :0.1775   Median :0.0855           
 Mean   :0.7252          Mean   :0.3926               Mean   :0.1848   Mean   :0.1106           
 3rd Qu.:0.8818          3rd Qu.:0.5072               3rd Qu.:0.2482   3rd Qu.:0.1412           
 Max.   :1.1410          Max.   :0.6310               Max.   :0.5660   Max.   :0.4530           
nrow(df2015)
[1] 158
ncol(df2015)
[1] 12
nrow(df2019)
[1] 156
ncol(df2019)
[1] 9

I noticed that although the number of rows and columns are not the same for df2015 and df2019, there are not many differences, and I’m going to select the features they both have for future analysis.
Features in both table: happiness rank, country/region, happiness score, GDP, family/social support, Healthy life expectancy, freedom, generosity, and perception of corruption.

Data Wrangling

Year <- c(2015)
Year2015<- data.frame(Year, df2015)
names(Year2015) <- c("Year", "Country", " Region", "Happiness_Rank","Happiness_Score","Standard_Error",  "Economy_GDP_per_Capita","Social_Support","Health_Life_Expectancy","Freedom","Trust_Government_Corruption", "Generosity", "Dystopia_Residual")


Year2 <- c(2019)
Year2019 <- data.frame(Year2, df2019)
names(Year2019) <- c("Year", "Happiness_Rank","Country","Happiness_Score","Economy_GDP_per_Capita","Social_Support","Health_Life_Expectancy","Freedom","Generosity","Trust_Government_Corruption")

New2015 <- Year2015 %>%
  select("Happiness_Rank","Country","Happiness_Score","Economy_GDP_per_Capita","Social_Support","Health_Life_Expectancy","Freedom","Generosity","Trust_Government_Corruption", "Year")

#convert character to numeric
FinalTable <- rbind(New2015, Year2019, by= "Country")
FinalTable[3:10] <- lapply(FinalTable[3:10], as.numeric)
NAs introduced by coercionNAs introduced by coercionNAs introduced by coercionNAs introduced by coercionNAs introduced by coercionNAs introduced by coercionNAs introduced by coercionNAs introduced by coercion
head(New2015)
head(Year2019)
head(FinalTable)
NA

Data Visualization

Overall correlation

data2015 <- FinalTable %>%
  filter(Year=="2015")
cormatrix2015 <- cor(data2015[3:9],use="pairwise.complete.obs")
corrplot(cormatrix2015, method = "number",tl.cex = 0.75, title="Correlation between Happiness scores and other attributs 2015",mar=c(0,0,1,0))


data2019 <- FinalTable %>%
  filter(Year=="2019")

cormatrix2019 <- cor(data2019[3:9],use="pairwise.complete.obs")
corrplot(cormatrix2019, method = "number",tl.cex = 0.75, title="Correlation between Happiness scores and other attributs 2019",mar=c(0,0,1,0))

data2019

Side-by-side boxplot to show how factors related to happiness score have changed over time:

par(mfrow=c(2,3))      
boxplot(Economy_GDP_per_Capita~Year,data=FinalTable)
boxplot(Social_Support~Year,data=FinalTable)
boxplot(Health_Life_Expectancy~Year,data=FinalTable)
boxplot(Freedom~Year,data=FinalTable)
boxplot(Generosity~Year,data=FinalTable)
boxplot(Trust_Government_Corruption~Year,data=FinalTable)

I’m going to list top 10 countries in 2015 and 2019.

#top 10
Top2015 <- data2015 %>% select(Country, Happiness_Score) %>% head(n=10)
Top2019 <- data2019 %>% select(Country, Happiness_Score) %>% head(n=10)

Top2015 <- ggplot(Top2015,aes(x=factor(Country, levels=Country), y=Happiness_Score)) + geom_bar(stat="identity", width = 0.5, fill="darkblue") +labs(title="Top 10 Happiest Countries 2015", x="Country ranked from 1 to 10", y="Score")
Top2019 <- ggplot(Top2019,aes(x=factor(Country, levels=Country), y=Happiness_Score)) +geom_bar(stat="identity", width = 0.5, fill="darkgreen")+labs(title="Top 10 Happiest Countries 2019", x="Country ranked from 1 to 10", y="Score")

grid.arrange(Top2015, Top2019)

Freedom2015 <- plot_ly(data2015, x=~Happiness_Score, 
                     y = ~Freedom, 
                     color = ~Country,
                     size= ~Happiness_Score)%>% 
                    layout(title="Freedom vs. Happiness Score (2015)", 
                           xaxis= list(title = "Happiness Score"),
                           yaxis= list(title = "Freedom"))
Freedom2019 <- plot_ly(data2019, x=~Happiness_Score, 
                     y = ~Freedom, 
                     color = ~Country,
                     size= ~Happiness_Score)%>% 
                    layout(title="Freedom vs. Happiness Score (2019)", 
                           xaxis= list(title = "Happiness Score"),
                           yaxis= list(title = "Freedom"))

htmltools::tagList(list(Freedom2015))

htmltools::tagList(list(Freedom2019))
LS0tDQp0aXRsZTogIkZpbmFsIHByb2plY3QiDQphdXRob3I6ICJZdXhpbiBMaXUiDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQojIyMgRGF0YSBBY2Nlc3MNCmBgYHtyfQ0KIyBjbGVhbiB1cCB0aGUgUlN0dWRpbyBlbnZpcm9ubWVudCANCnJtKGxpc3QgPSBscygpKQ0KDQojICMgTG9hZGluZyBwYWNrYWdlcw0KbGlicmFyeShEYXRhQ29tcHV0aW5nKQ0KbGlicmFyeShtb3NhaWMpDQpsaWJyYXJ5KHRpZHl2ZXJzZSkNCmxpYnJhcnkobHVicmlkYXRlKQ0KbGlicmFyeShnZ3Bsb3QyKQ0KbGlicmFyeShnZ2FsdCkNCmxpYnJhcnkoY29ycnBsb3QpDQpsaWJyYXJ5KHBsb3RseSkNCmxpYnJhcnkoZ3JpZEV4dHJhKQ0KYGBgDQoNClNpbmNlIHRoZSBtb3N0IHJlY2VudCBhdmFpbGFibGUgZGF0YSB3YXMgZnJvbSAyMDE5LCBJJ20gZ29pbmcgdG8gY29tcGFyZSBhbmQgY29udHJhc3QgdXNpbmcgdGhlIGRhdGEgb2YgMjAxNSBhbmQgMjAxOS4NCmBgYHtyfQ0KIyBEYXRhIHNvdXJjZXMNCmRmMjAxNSA8LSByZWFkLmNzdigiMjAxNS5jc3YiKQ0KZGYyMDE5IDwtIHJlYWQuY3N2KCIyMDE5LmNzdiIpDQoNCiMgRGF0YSBpbnNwZWN0aW9uDQpoZWFkKGRmMjAxNSwgMTApICANCmhlYWQoZGYyMDE5LCAxMCkNCg0Kc3VtbWFyeShkZjIwMTUpDQpzdW1tYXJ5KGRmMjAxOSkNCg0KbnJvdyhkZjIwMTUpDQpuY29sKGRmMjAxNSkNCm5yb3coZGYyMDE5KQ0KbmNvbChkZjIwMTkpDQpgYGANCkkgbm90aWNlZCB0aGF0IGFsdGhvdWdoIHRoZSBudW1iZXIgb2Ygcm93cyBhbmQgY29sdW1ucyBhcmUgbm90IHRoZSBzYW1lIGZvciBkZjIwMTUgYW5kIGRmMjAxOSwgdGhlcmUgYXJlIG5vdCBtYW55IGRpZmZlcmVuY2VzLCBhbmQgSSdtIGdvaW5nIHRvIHNlbGVjdCB0aGUgZmVhdHVyZXMgdGhleSBib3RoIGhhdmUgZm9yIGZ1dHVyZSBhbmFseXNpcy4gICAgIA0KRmVhdHVyZXMgaW4gYm90aCB0YWJsZTogaGFwcGluZXNzIHJhbmssIGNvdW50cnkvcmVnaW9uLCBoYXBwaW5lc3Mgc2NvcmUsIEdEUCwgZmFtaWx5L3NvY2lhbCBzdXBwb3J0LCBIZWFsdGh5IGxpZmUgZXhwZWN0YW5jeSwgZnJlZWRvbSwgZ2VuZXJvc2l0eSwgYW5kIHBlcmNlcHRpb24gb2YgY29ycnVwdGlvbi4NCg0KDQojIyMgRGF0YSBXcmFuZ2xpbmcNCmBgYHtyfQ0KWWVhciA8LSBjKDIwMTUpDQpZZWFyMjAxNTwtIGRhdGEuZnJhbWUoWWVhciwgZGYyMDE1KQ0KbmFtZXMoWWVhcjIwMTUpIDwtIGMoIlllYXIiLCAiQ291bnRyeSIsICIgUmVnaW9uIiwgIkhhcHBpbmVzc19SYW5rIiwiSGFwcGluZXNzX1Njb3JlIiwiU3RhbmRhcmRfRXJyb3IiLCAgIkVjb25vbXlfR0RQX3Blcl9DYXBpdGEiLCJTb2NpYWxfU3VwcG9ydCIsIkhlYWx0aF9MaWZlX0V4cGVjdGFuY3kiLCJGcmVlZG9tIiwiVHJ1c3RfR292ZXJubWVudF9Db3JydXB0aW9uIiwgIkdlbmVyb3NpdHkiLCAiRHlzdG9waWFfUmVzaWR1YWwiKQ0KDQoNClllYXIyIDwtIGMoMjAxOSkNClllYXIyMDE5IDwtIGRhdGEuZnJhbWUoWWVhcjIsIGRmMjAxOSkNCm5hbWVzKFllYXIyMDE5KSA8LSBjKCJZZWFyIiwgIkhhcHBpbmVzc19SYW5rIiwiQ291bnRyeSIsIkhhcHBpbmVzc19TY29yZSIsIkVjb25vbXlfR0RQX3Blcl9DYXBpdGEiLCJTb2NpYWxfU3VwcG9ydCIsIkhlYWx0aF9MaWZlX0V4cGVjdGFuY3kiLCJGcmVlZG9tIiwiR2VuZXJvc2l0eSIsIlRydXN0X0dvdmVybm1lbnRfQ29ycnVwdGlvbiIpDQoNCk5ldzIwMTUgPC0gWWVhcjIwMTUgJT4lDQogIHNlbGVjdCgiSGFwcGluZXNzX1JhbmsiLCJDb3VudHJ5IiwiSGFwcGluZXNzX1Njb3JlIiwiRWNvbm9teV9HRFBfcGVyX0NhcGl0YSIsIlNvY2lhbF9TdXBwb3J0IiwiSGVhbHRoX0xpZmVfRXhwZWN0YW5jeSIsIkZyZWVkb20iLCJHZW5lcm9zaXR5IiwiVHJ1c3RfR292ZXJubWVudF9Db3JydXB0aW9uIiwgIlllYXIiKQ0KDQojY29udmVydCBjaGFyYWN0ZXIgdG8gbnVtZXJpYw0KRmluYWxUYWJsZSA8LSByYmluZChOZXcyMDE1LCBZZWFyMjAxOSwgYnk9ICJDb3VudHJ5IikNCkZpbmFsVGFibGVbMzoxMF0gPC0gbGFwcGx5KEZpbmFsVGFibGVbMzoxMF0sIGFzLm51bWVyaWMpDQpoZWFkKE5ldzIwMTUpDQpoZWFkKFllYXIyMDE5KQ0KaGVhZChGaW5hbFRhYmxlKQ0KDQpgYGANCg0KDQoNCiMjIyBEYXRhIFZpc3VhbGl6YXRpb24NCk92ZXJhbGwgY29ycmVsYXRpb24NCmBgYHtyfQ0KZGF0YTIwMTUgPC0gRmluYWxUYWJsZSAlPiUNCiAgZmlsdGVyKFllYXI9PSIyMDE1IikNCmNvcm1hdHJpeDIwMTUgPC0gY29yKGRhdGEyMDE1WzM6OV0sdXNlPSJwYWlyd2lzZS5jb21wbGV0ZS5vYnMiKQ0KY29ycnBsb3QoY29ybWF0cml4MjAxNSwgbWV0aG9kID0gIm51bWJlciIsdGwuY2V4ID0gMC43NSwgdGl0bGU9IkNvcnJlbGF0aW9uIGJldHdlZW4gSGFwcGluZXNzIHNjb3JlcyBhbmQgb3RoZXIgYXR0cmlidXRzIDIwMTUiLG1hcj1jKDAsMCwxLDApKQ0KDQpkYXRhMjAxOSA8LSBGaW5hbFRhYmxlICU+JQ0KICBmaWx0ZXIoWWVhcj09IjIwMTkiKQ0KDQpjb3JtYXRyaXgyMDE5IDwtIGNvcihkYXRhMjAxOVszOjldLHVzZT0icGFpcndpc2UuY29tcGxldGUub2JzIikNCmNvcnJwbG90KGNvcm1hdHJpeDIwMTksIG1ldGhvZCA9ICJudW1iZXIiLHRsLmNleCA9IDAuNzUsIHRpdGxlPSJDb3JyZWxhdGlvbiBiZXR3ZWVuIEhhcHBpbmVzcyBzY29yZXMgYW5kIG90aGVyIGF0dHJpYnV0cyAyMDE5IixtYXI9YygwLDAsMSwwKSkNCmRhdGEyMDE5DQpgYGANCg0KDQpTaWRlLWJ5LXNpZGUgYm94cGxvdCB0byBzaG93IGhvdyBmYWN0b3JzIHJlbGF0ZWQgdG8gaGFwcGluZXNzIHNjb3JlIGhhdmUgY2hhbmdlZCBvdmVyIHRpbWU6DQoNCmBgYHtyfQ0KcGFyKG1mcm93PWMoMiwzKSkgICAgICANCmJveHBsb3QoRWNvbm9teV9HRFBfcGVyX0NhcGl0YX5ZZWFyLGRhdGE9RmluYWxUYWJsZSkNCmJveHBsb3QoU29jaWFsX1N1cHBvcnR+WWVhcixkYXRhPUZpbmFsVGFibGUpDQpib3hwbG90KEhlYWx0aF9MaWZlX0V4cGVjdGFuY3l+WWVhcixkYXRhPUZpbmFsVGFibGUpDQpib3hwbG90KEZyZWVkb21+WWVhcixkYXRhPUZpbmFsVGFibGUpDQpib3hwbG90KEdlbmVyb3NpdHl+WWVhcixkYXRhPUZpbmFsVGFibGUpDQpib3hwbG90KFRydXN0X0dvdmVybm1lbnRfQ29ycnVwdGlvbn5ZZWFyLGRhdGE9RmluYWxUYWJsZSkNCmBgYA0KSSdtIGdvaW5nIHRvIGxpc3QgdG9wIDEwIGNvdW50cmllcyBpbiAyMDE1IGFuZCAyMDE5Lg0KDQpgYGB7cn0NCiN0b3AgMTANClRvcDIwMTUgPC0gZGF0YTIwMTUgJT4lIHNlbGVjdChDb3VudHJ5LCBIYXBwaW5lc3NfU2NvcmUpICU+JSBoZWFkKG49MTApDQpUb3AyMDE5IDwtIGRhdGEyMDE5ICU+JSBzZWxlY3QoQ291bnRyeSwgSGFwcGluZXNzX1Njb3JlKSAlPiUgaGVhZChuPTEwKQ0KDQpUb3AyMDE1IDwtIGdncGxvdChUb3AyMDE1LGFlcyh4PWZhY3RvcihDb3VudHJ5LCBsZXZlbHM9Q291bnRyeSksIHk9SGFwcGluZXNzX1Njb3JlKSkgKyBnZW9tX2JhcihzdGF0PSJpZGVudGl0eSIsIHdpZHRoID0gMC41LCBmaWxsPSJkYXJrYmx1ZSIpICtsYWJzKHRpdGxlPSJUb3AgMTAgSGFwcGllc3QgQ291bnRyaWVzIDIwMTUiLCB4PSJDb3VudHJ5IHJhbmtlZCBmcm9tIDEgdG8gMTAiLCB5PSJTY29yZSIpDQpUb3AyMDE5IDwtIGdncGxvdChUb3AyMDE5LGFlcyh4PWZhY3RvcihDb3VudHJ5LCBsZXZlbHM9Q291bnRyeSksIHk9SGFwcGluZXNzX1Njb3JlKSkgK2dlb21fYmFyKHN0YXQ9ImlkZW50aXR5Iiwgd2lkdGggPSAwLjUsIGZpbGw9ImRhcmtncmVlbiIpK2xhYnModGl0bGU9IlRvcCAxMCBIYXBwaWVzdCBDb3VudHJpZXMgMjAxOSIsIHg9IkNvdW50cnkgcmFua2VkIGZyb20gMSB0byAxMCIsIHk9IlNjb3JlIikNCg0KZ3JpZC5hcnJhbmdlKFRvcDIwMTUsIFRvcDIwMTkpDQpgYGANCg0KDQoNCg0KYGBge3IgbWVzc2FnZT1GQUxTRSwgd2FybmluZz1GQUxTRX0NCkZyZWVkb20yMDE1IDwtIHBsb3RfbHkoZGF0YTIwMTUsIHg9fkhhcHBpbmVzc19TY29yZSwgDQogICAgICAgICAgICAgICAgICAgICB5ID0gfkZyZWVkb20sIA0KICAgICAgICAgICAgICAgICAgICAgY29sb3IgPSB+Q291bnRyeSwNCiAgICAgICAgICAgICAgICAgICAgIHNpemU9IH5IYXBwaW5lc3NfU2NvcmUpICU+JSANCiAgICAgICAgICAgICAgICAgICAgbGF5b3V0KHRpdGxlPSJGcmVlZG9tIHZzLiBIYXBwaW5lc3MgU2NvcmUgKDIwMTUpIiwgDQogICAgICAgICAgICAgICAgICAgICAgICAgICB4YXhpcz0gbGlzdCh0aXRsZSA9ICJIYXBwaW5lc3MgU2NvcmUiKSwNCiAgICAgICAgICAgICAgICAgICAgICAgICAgIHlheGlzPSBsaXN0KHRpdGxlID0gIkZyZWVkb20iKSkNCkZyZWVkb20yMDE5IDwtIHBsb3RfbHkoZGF0YTIwMTksIHg9fkhhcHBpbmVzc19TY29yZSwgDQogICAgICAgICAgICAgICAgICAgICB5ID0gfkZyZWVkb20sIA0KICAgICAgICAgICAgICAgICAgICAgY29sb3IgPSB+Q291bnRyeSwNCiAgICAgICAgICAgICAgICAgICAgIHNpemU9IH5IYXBwaW5lc3NfU2NvcmUpICU+JSANCiAgICAgICAgICAgICAgICAgICAgbGF5b3V0KHRpdGxlPSJGcmVlZG9tIHZzLiBIYXBwaW5lc3MgU2NvcmUgKDIwMTkpIiwgDQogICAgICAgICAgICAgICAgICAgICAgICAgICB4YXhpcz0gbGlzdCh0aXRsZSA9ICJIYXBwaW5lc3MgU2NvcmUiKSwNCiAgICAgICAgICAgICAgICAgICAgICAgICAgIHlheGlzPSBsaXN0KHRpdGxlID0gIkZyZWVkb20iKSkNCg0KaHRtbHRvb2xzOjp0YWdMaXN0KGxpc3QoRnJlZWRvbTIwMTUpKQ0KaHRtbHRvb2xzOjp0YWdMaXN0KGxpc3QoRnJlZWRvbTIwMTkpKQ0KYGBgDQoNCg0KDQo=